Calgary Shared Mobility Pilot Trips Analysis

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly
import datetime as dt
#import altair as alt
from pathlib import Path
import os
In [2]:
# # Filter all warnings.
# # spurious warnings.
# import warnings
# warnings.filterwarnings('ignore')
In [3]:
from IPython.display import HTML

# Toggle butten to hide the code from the notebook
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[3]:
In [4]:
# Read in pre-formatted dataset
project_dir = Path().resolve().parents[0]
file_name = os.path.join(project_dir, 'data', 'final', 'all_data.csv')
all_trips = pd.read_csv(file_name)
all_trips.datetime = pd.to_datetime(all_trips.datetime)
all_trips.start_date = pd.to_datetime(all_trips.start_date)
In [5]:
# Mapbox Token
px.set_mapbox_access_token(open(os.path.join(project_dir,'data','raw','mapbox.token')).read())

Some general questions

General questions that I wanted to ask with the data:

  • How popular are the bikes/scooters?
  • Where are doing with them? i.e. Where are they going? How far/how long are the trips?
  • Can we guess how much money has been spent on the scooters/bikes?
  • Using the data, can we a few archetypes of particular scooter users that explain how different people are using them?
  • How does the weather impact rental count?
In [6]:
# Sample Data
all_trips.head()
Out[6]:
vehicle_type start_date start_hour start_day start_day_of_week trip_distance trip_duration startx starty endx ... a_dist travel_efficiency speed a_speed is_weekend is_holiday datetime Temp (°C) Wind Spd (km/h) Weather
0 scooter 2019-08-22 16 Thursday 4 338 129 -114.071462 51.046469 -114.071462 ... 62.040324 0.183551 9.432558 1.731358 0 0 2019-08-22 16:00:00 19.4 11.0 Mainly Clear
1 scooter 2019-09-13 23 Friday 5 1092 347 -114.073762 51.047304 -114.073762 ... 62.040324 0.056813 11.329107 0.643646 0 0 2019-09-13 23:00:00 9.7 9.0 Mainly Clear
2 scooter 2019-08-08 10 Thursday 4 2059 547 -114.255975 51.158312 -114.246686 ... 1622.721148 0.788111 13.551005 10.679700 0 0 2019-08-08 10:00:00 20.1 10.0 Clear
3 scooter 2019-08-08 11 Thursday 4 158 228 -114.071462 51.046469 -114.071462 ... 62.040324 0.392660 2.494737 0.979584 0 0 2019-08-08 11:00:00 22.2 20.0 Clear
4 scooter 2019-07-24 16 Wednesday 3 1009 308 -114.147194 51.007078 -114.149488 ... 186.139286 0.184479 11.793506 2.175654 0 0 2019-07-24 16:00:00 21.0 36.0 Mainly Clear

5 rows × 21 columns

In [7]:
fig1 = px.bar(all_trips.groupby('vehicle_type').count().reset_index(), x="vehicle_type", y="start_date", 
              color = "vehicle_type")
fig1.show()
In [8]:
plotly.offline.plot(fig1, filename='file.html')
Out[8]:
'file.html'
In [9]:
fig2 = px.line(all_trips.groupby(['start_date','vehicle_type']).count().reset_index(), 
               x="start_date", y='a_dist', color='vehicle_type', 
               labels={'a_dist':'Number of Rentals/Day', 'start_date': 'Date'})
fig2.show()

The reset of the analysis will just be on scooters.

In [10]:
scooter = all_trips[all_trips['vehicle_type'] == 'scooter']
In [11]:
fig2 = px.line(scooter.groupby(['datetime']).count().reset_index(), 
               x="datetime", y='a_dist', labels={'a_dist':'Rentals/hr', 'Datetime':'Time'},
               range_x=[dt.date(2019,8,18), dt.date(2019,9,1)]
               )
fig2.show()
In [12]:
scooter2 = scooter.groupby(['datetime']).count().reset_index()
In [13]:
import plotly.graph_objects as go

fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=scooter2['datetime'],
                         y=scooter2['a_dist'].values.tolist(), 
               mode = 'lines',
               opacity = 1,
#                line = dict(color = '#17BECF'),
               name = 'Scooter Rentals'))
    
# Set title
fig1.update_layout(
    title_text="Number of Scooter Rentals per Hour",
    xaxis = dict(title = 'Date'),
    yaxis = dict(title = 'Rentals/hr')) 

# Add range slider
fig1.update_layout(
    xaxis=go.layout.XAxis(
        rangeselector=dict(
            buttons=list([              
                dict(count=1,
                     label="1d",
                     step="day",
                     stepmode="backward"),
                dict(count=2,
                     label="2d",
                     step="day",
                     stepmode="backward"),
                dict(count=7,
                     label="7d",
                     step="day",
                     stepmode="backward"),
                dict(count=14,
                     label="14d",
                     step="day",
                     stepmode="backward"),
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=2,
                     label="2m",
                     step="month",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig1.show()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [14]:
px.scatter(all_trips, x='trip_distance', y='travel_efficiency')
In [ ]:
px.histogram(all_trips, x='travel_efficiency', range_x=[0,4])
In [ ]:
px.scatter(all_trips, x='trip_distance', y='a_dist', opacity=0.5, 
           color='trip_duration', range_x=[0,60000], range_y=[0,60000] )
In [ ]:
px.scatter(all_trips, x='trip_duration', y='speed' )
In [ ]:
px.scatter(all_trips, x='trip_duration', y='a_speed' )
In [ ]: